---
title: "Analysis"
output: html_document
---

```{r set-true-if-figures-should-be-exported-as-used-in-the-document}
export_figures <- TRUE
if (export_figures & !file.exists("Output/figures main section")){dir.create("Output/figures main section")}
if (export_figures & !file.exists("Output/figures appendix")){dir.create("Output/figures appendix")}


# folders for tables
if (export_figures & !file.exists("Output/figures main section")){dir.create("Output/tables main section")}
if (export_figures & !file.exists("Output/figures appendix")){dir.create("Output/tables appendix")}

stargazer_opt <- "html" # or "text"
```

```{r}
parties <- c("cducsu", "spd" , "afd",  "fdp",  "left", "greens")
parties_label <- c("CDU/CSU", "SPD", "AfD", "FDP", "The Left", "The Greens")
col_parties <- c("black", "red", "blue3", "yellow2", "hotpink1", "green3") 
```


```{r Load data}
# # Manifesto data (mani_df$dict_cat_lab is based on the manifesto coding recoded into our categories, not classified by the dictionary!)

for(i in list.files("Data_processed_for_analysis")) {
  load(paste0("Data_processed_for_analysis/", i))
}
```

```{r load functions}
for(i in list.files("Functions")) {
  source(paste0("Functions/",i))
}
```

```{r}
library("stargazer")
library("MASS")
library("stringr")
library("nnet")
library("dplyr")
library("reshape")
library("sjPlot")
library("sjmisc")
library("ggplot2")
library("knitr")
library("lme4")
```


# Which words lead to the classification of tweets?

```{r words-that-classify, include = T}
top_five_words_each_party_each_cat_list <- vector("list", 18)
for(i in 1:length(unique(dic_word_count$category))) {
  dic_word_count_one_cat <- dic_word_count[dic_word_count$category==unique(dic_word_count$category)[i],]
  parties_one_cat_list <- vector("list", 6)
  for(j in 1:6) {
    dic_word_count_one_cat_one_party <- dic_word_count_one_cat[dic_word_count_one_cat$party==unique(dic_word_count_one_cat$party)[j],]
    dic_word_count_one_cat_one_party <- dic_word_count_one_cat_one_party[order(dic_word_count_one_cat_one_party$count, decreasing = T),]
    parties_one_cat_list[[j]] <- head(dic_word_count_one_cat_one_party[c("word", "count")], 5)
  }
  names(parties_one_cat_list) <- unique(dic_word_count_one_cat$party)
  top_five_words_each_party_each_cat_list[[i]] <- parties_one_cat_list
}
names(top_five_words_each_party_each_cat_list) <- unique(dic_word_count$category)
#print(top_five_words_each_party_each_cat_list)
```

```{r}
top_ten_each_issue <- rep(dic_word_count$word, dic_word_count$count)
top_ten_each_issue <- data.frame(cbind(top_ten_each_issue, rep(as.character(dic_word_count$category), dic_word_count$count)))


if(export_figures) {
  file.create("Output/tables appendix/tab_a3_dictionary_top_ten.txt")
  #fileConn<-file("Output/tables appendix/dictionary_top_ten.txt")
  for (i in unique(top_ten_each_issue$V2)) {
    cat(paste(c(i, names(head(sort(table(top_ten_each_issue$top_ten_each_issue[top_ten_each_issue$V2==i]), decreasing=T), 13))), 
                     collapse=", "), file="Output/tables appendix/tab_a3_dictionary_top_ten",  sep = "\n", append=T)
   # close(fileConn)
  }
}
```


# Descriptive analysis

```{r}
colSums(candidates_tweets[,dict_cat])
candidates_tweets$tweet_classified_count <- rowSums(candidates_tweets[,dict_cat])
table(candidates_tweets$tweet_classified_count)
candidates_tweets$tweet_classified <- ifelse(candidates_tweets$tweet_classified_count==0, 0, 1)

candidates_tweets$tweet_classified_count_wo_pc <- rowSums(candidates_tweets[,dict_cat_wo_pc])
candidates_tweets$tweet_classified_wo_pc <- ifelse(candidates_tweets$tweet_classified_count_wo_pc==0, 0, 1)


# Share of tweets that can/cannot be classified (tweets can be classified twice)
table(candidates_tweets$partyname) # Total number of tweets per party
nrow(candidates_tweets) # Total number of tweets
table(candidates_tweets$partyname, candidates_tweets$tweet_classified) # Tweets classified per party
table(candidates_tweets$tweet_classified) # total tweets that cannot be classified
table(candidates_tweets$tweet_classified)/sum(table(candidates_tweets$tweet_classified))# total tweets that cannot be classified (percent)
table(candidates_tweets$partyname, candidates_tweets$tweet_classified)/rowSums(table(candidates_tweets$partyname, candidates_tweets$tweet_classified)) # Tweets classified per party

# Active accounts per party
number_of_act_acc <- numeric(6)
for(i in 1:6) {
  number_of_act_acc[i] <- length(unique(candidates_tweets$screen_name_tolower[candidates_tweets$partyname==names(table(candidates_tweets$partyname))[i]]))
}
names(number_of_act_acc) <- names(table(candidates_tweets$partyname))

# Active accounts
number_of_act_acc
# Average number of tweets per active account
table(candidates_tweets$partyname)/number_of_act_acc

# Generation of a category per party table
cat_per_party <- sum_multiple_cols_by_group(df_col_subset(candidates_tweets, "foreign_affairs", "agriculture"), candidates_tweets$partyname)
election_content_comp <- data.frame("election_campaign" = cat_per_party[,which(colnames(cat_per_party)=="party_competition")], row.names = rownames(cat_per_party))
cat_per_party <- cat_per_party[,-which(colnames(cat_per_party)=="party_competition")] # remove party competition
cat_share_per_party <- cat_per_party/rowSums(cat_per_party)
```

```{r}
party_comp_only <- candidates_tweets$party_competition==1 & candidates_tweets$tweet_classified_count_wo_pc==0
round(table(candidates_tweets$partyname, party_comp_only)/rowSums(table(candidates_tweets$partyname, party_comp_only)),4)


tab_class_party_comp <- round(table(candidates_tweets$partyname, party_comp_only)/rowSums(table(candidates_tweets$partyname, candidates_tweets$party_competition)),4)

# tweets classified twice
table(candidates_tweets$tweet_classified_wo_pc, candidates_tweets$party_competition)/nrow(candidates_tweets)

# Comparison of summed up shares and total
compare_share_and_total_twitter <- cbind(round(colSums(cat_share_per_party[,order(colnames(cat_share_per_party))]), 4), colSums(cat_per_party[,order(colnames(cat_per_party))]))
compare_share_and_total_twitter <- compare_share_and_total_twitter[order(compare_share_and_total_twitter[,1]),]
colnames(compare_share_and_total_twitter) <- c("Summed up share of tweets", "Abs. number")
```

```{r comparison of classification as topic or election campaign or both}
election_content_comp$content <- table(candidates_tweets$partyname, candidates_tweets$tweet_classified_wo_pc)[,2]
election_content_comp$number_of_tweets <- as.numeric(table(candidates_tweets$partyname))
election_content_comp$not_classified <- table(candidates_tweets$partyname, candidates_tweets$tweet_classified)[,1]
election_content_comp$share_election_content <- election_content_comp$election_campaign/election_content_comp$number_of_tweets
election_content_comp$share_content <- election_content_comp$content/election_content_comp$number_of_tweets
election_content_comp$share_not_classified <- election_content_comp$not_classified/election_content_comp$number_of_tweets
election_content_comp$overlap <- election_content_comp$share_not_classified+election_content_comp$share_content+election_content_comp$share_election_content-1
election_content_comp$only_election_campaign <- election_content_comp$share_election_content -(election_content_comp$share_not_classified+election_content_comp$share_content+election_content_comp$share_election_content-1)
election_content_comp$only_content <- election_content_comp$share_content -(election_content_comp$share_not_classified+election_content_comp$share_content+election_content_comp$share_election_content-1)
```

```{r Table 1}
# table 1
table(candidates$partyname)
table(candidates_tweets$partyname)
rownames(election_content_comp)
round(election_content_comp$share_content*100,1)
round(election_content_comp$only_election_campaign*100,1)

exp_tab1 <- rbind(table(candidates$partyname),table(candidates_tweets$partyname), round(election_content_comp$share_content*100,1), round(election_content_comp$only_election_campaign*100,1))
write.table(exp_tab1, "Output/tables main section/tab1_overview.txt")
```


```{r Figure A.2 Classification rate of tweets for each party}
if (export_figures) {pdf("Output/figures appendix/figure_A2_classification_rate_of_tweets.pdf", height=6, width=9.5)}
par(mar=c(4,6,4,2))
  p <- barplot(t(as.matrix(election_content_comp[rev(rownames(election_content_comp)),c(9,8,10,7)])), 
               horiz = T, las=1, xlim = c(0,1), border=F, main="Classification of Tweets", 
               col=c("darkred", "orange", "darkgreen", "grey80"), xlab="Share",
               names.arg = rev(c("AfD", "CDU/CSU", "FDP", "The Greens", "The Left", "SPD")))
  text(cumsum(t(as.matrix(election_content_comp[rev(rownames(election_content_comp)),
                                                c(9,8,10,7)]))[,6]) - (t(as.matrix(election_content_comp[rev(rownames(election_content_comp)),c(9,8,10,7)]))[,6]/2), p[6]+0.7,
       c("Election campaign", "EC&Con",  "Content", "Not classified"), xpd=T)
if (export_figures) {dev.off()}
```

# Figure A.1: Share of policy areas in manifestos and aggregated candidate tweets.

```{r top three policy areas}
cat_share_per_party_top_three <- is_top_three(cat_share_per_party[parties,], col_parties)
colnames(cat_share_per_party_top_three) <- colnames(cat_share_per_party[parties,])
rownames(cat_share_per_party_top_three) <- rownames(cat_share_per_party[parties,])
col_top_three_sorted <- as.matrix(cat_share_per_party_top_three[,order(colSums(cat_share_per_party))])

cat_share_per_party_sorted <- as.matrix(cat_share_per_party[parties,order(colSums(cat_share_per_party))])
```

```{r top three manifesto}
party_share_of_topics_mani <- (table(mani_df$partynameshort, mani_df$dict_cat_lab)[parties, order(table(mani_df$dict_cat_lab))][,-1])/rowSums((table(mani_df$partynameshort, mani_df$dict_cat_lab)[parties, order(table(mani_df$dict_cat_lab))][,-1]))
party_share_of_topics_mani <- party_share_of_topics_mani[,order(colSums(party_share_of_topics_mani))]

cat_share_per_party_top_three_mani <- is_top_three(party_share_of_topics_mani[parties,], col_parties)
colnames(cat_share_per_party_top_three_mani) <- colnames(party_share_of_topics_mani[parties,])
rownames(cat_share_per_party_top_three_mani) <- rownames(party_share_of_topics_mani[parties,])
col_top_three_sorted_mani <- as.matrix(cat_share_per_party_top_three_mani[parties,order(colSums(party_share_of_topics_mani))])

cat_share_per_party_sorted_mani <- as.matrix(party_share_of_topics_mani[parties,order(colSums(party_share_of_topics_mani))])
```


```{r Figure A.1 Share of policy areas in manifestos and aggregated candidate tweets.,fig.height=6, fig.width=12.5, include=T}
if (export_figures) {pdf("Output/figures appendix/figure_A1_share_of_policy_areas.pdf", height=6, width=12.5)}
  par(mfrow=c(1,2))
  par(mar=c(4,12,2,2))
  plot(0,0, pch="", xlim=c(0,0.8), ylim=c(1,17), axes=F, xlab="Share of Topics in Manifestos", ylab="")
  for(i in 1:ncol(cat_share_per_party_sorted_mani)) {
    rect_coords <- c(0,cumsum(cat_share_per_party_sorted_mani[,i]))
    for(j in 1:6) {
      rect(rect_coords[j], i-0.4, rect_coords[j+1], i+0.5, col=col_top_three_sorted_mani[j,i], border="white")
    }
  }
  axis(1)

  lab <- colnames(cat_share_per_party_sorted_mani)
  lab[10] <- "technology_infrastructure" #correct typo
  axis(2, at=c(1:17), labels=lab, las=1, tick=F, line=0)
  
  label_pos <- numeric(6)
  for(i in 1:6) {label_pos[i] <- rect_coords[i]+(rect_coords[i+1]-rect_coords[i])/2}
  text(label_pos, rep(ncol(cat_share_per_party_sorted_mani)+0.4, 6), parties_label, xpd=T, pos=3, col=col_parties)
  
  
  cat_share_per_party_sorted <- cat_share_per_party_sorted[,colnames(cat_share_per_party_sorted_mani)]
  col_top_three_sorted <- col_top_three_sorted[,colnames(col_top_three_sorted_mani)]
  
  par(mar=c(4,12,2,2))
  plot(0,0, pch="", xlim=c(0,0.65), ylim=c(1,17), axes=F, xlab="Share of Topics in Tweets", ylab="")
  for(i in 1:ncol(cat_share_per_party_sorted)) {
    rect_coords <- c(0,cumsum(cat_share_per_party_sorted[,i]))
    for(j in 1:6) {
      rect(rect_coords[j], i-0.4, rect_coords[j+1], i+0.5, col=col_top_three_sorted_mani[j,i], border="white")
    }
  }
  axis(1)
  lab <- colnames(cat_share_per_party_sorted)
  lab[10] <- "technology_infrastructure" #correct typo
  axis(2, at=c(1:17), labels=lab, las=1, tick=F, line=0)
  
  label_pos <- numeric(6)
  for(i in 1:6) {label_pos[i] <- rect_coords[i]+(rect_coords[i+1]-rect_coords[i])/2}
  text(label_pos, rep(ncol(cat_share_per_party_sorted), 6) + c(0.4,0.9,0.4,0.9,0.4,0.9), parties_label, xpd=T, pos=3, col=col_parties)
if (export_figures) {dev.off()}
```


# Figure 1: Issue concentration in manifestos and in online communication, aggregated by party.

```{r}
herfi_mani <- numeric(6)
for(i in 1:6) {
  herfi_mani[i] <- calc_herfindahl_index(colSums(mani_df[mani_df$partyname==unique(mani_df$partyname)[i],c(which(colnames(mani_df)=="foreign_affairs"):which(colnames(mani_df)=="agriculture" ))[-which(colnames(mani_df)[c(which(colnames(mani_df)=="foreign_affairs"):which(colnames(mani_df)=="agriculture" ))]=="party_competition")]]))
}
herfi_mani <- data.frame(unique(mani_df$partyname), herfi_mani)
herfi_mani[,1] <- c("greens",  "left", "spd", "fdp", "cducsu", "afd")

# Herfi for tweets by party (ignoring the politicians)
herfi_twit_agg <- numeric(6)
for(i in 1:6) {
  herfi_twit_agg[i] <- calc_herfindahl_index(colSums(candidates_tweets[candidates_tweets$partyname==unique(candidates_tweets$partyname)[i],c(which(colnames(candidates_tweets)=="foreign_affairs"):which(colnames(candidates_tweets)=="agriculture"))[-which(colnames(candidates_tweets)[c(which(colnames(candidates_tweets)=="foreign_affairs"):which(colnames(candidates_tweets)=="agriculture" ))]=="party_competition")]]))
}
herfi_twit_agg <- data.frame(unique(candidates_tweets$partyname), herfi_twit_agg)

herfi_comparison <- merge(herfi_mani, herfi_twit_agg, by.x = "unique.mani_df.partyname.", by.y="unique.candidates_tweets.partyname.")
herfi_comparison$partylabel <- c("AfD", "CDU/CSU", "FDP", "The Greens", "The Left", "SPD")
```


```{r Figure 1: Issue concentration in manifestos and in online communication, aggregated by party., include=T}
if (export_figures) {pdf("Output/figures main section/figure_1_issue_concentration.pdf", height = 5, width=7)}
  par(mar=c(4,4,1,1))
    barplot(t(as.matrix(herfi_comparison[,c(2,3)])), beside=T, names.arg = herfi_comparison$partylabel, las=1, ylim=c(0,0.13), 
            ylab="Herfindahl-Hirschman index", 
            col=c(rgb(252,141,98,255, maxColorValue = 255), rgb(141,160,203,255, maxColorValue = 255)),
            border=F, main = "Issue concentration aggregated per party")
    legend("bottom", c("Manifesto", "Tweets"), inset=c(0,-.2), xpd=T, horiz=T, bty="n", 
           fill=c(rgb(252,141,98,255, maxColorValue = 255), rgb(141,160,203,255, maxColorValue = 255)), 
           border=c(rgb(252,141,98,255, maxColorValue = 255), rgb(141,160,203,255, maxColorValue = 255)))
if (export_figures) {dev.off()}
```

```{r}
round(sapply(herfi_comparison[,c(2,3)], range),2)
```



# Core issues

```{r}
#### ---- Index 1: Top 3 topics of a party ------------------------------------------------------------------------------------------
number_of_top_issues <- 3
# Topthemen anhand unseres Recodings der Manifesto Kategorien
top_topics <- table(mani_df$dict_cat_lab, mani_df$partynameshort)
top_topics <- top_topics[-which(rownames(top_topics)=="party_competition"),]

top_topics_each_party <- vector("list", 6)


for (i in 1:6) {
  top_topics_each_party[[i]] <- names(top_topics[order(top_topics[,i], decreasing = T),][1:number_of_top_issues,i]) # Top drei Topics jeder Party
}
names(top_topics_each_party) <- colnames(top_topics)

candidates_tweets$partyname_factor <- factor(candidates_tweets$partyname, levels=c("afd", "cducsu", "fdp", "greens", "left", "spd"))

# Add dummy variable to candidates results data frame if a tweet is concerning a top 3 topic of a party or not
candidates_tweets$top_issue <- 0
for (i in 1:6) {
  candidates_tweets$top_issue <- ifelse(names(top_topics_each_party)[i]==candidates_tweets$partyname_factor & rowSums(candidates_tweets[,top_topics_each_party[[i]]])>0, 1, candidates_tweets$top_issue)
}
```


```{r Table A.6 Core issues each party}
if(export_figures) {
  file.create("Output/tables appendix/tab_a6_core_issues_each_party.txt")
  for (i in 1:length(top_topics_each_party)) {
    cat(paste(c(names(top_topics_each_party)[i], unlist(top_topics_each_party[i])), 
                     collapse=", "), file="Output/tables appendix/tab_a6_core_issues_each_party.txt",  sep = "\n", append=T)
  }
}
```


# Analysis over time

```{r preparation Figure A.4}
candidates_tweets$created_at_day <- as.Date(candidates_tweets$created_at)
tweets_per_day <- table(candidates_tweets$created_at_day)

election_day <- as.Date("2017-09-24")
tv_debate <- as.Date("2017-09-03")

#  3.7. - 16.7. Week 1
# 17.7. - 30.7. Week 2
# 31.7. - 13.8. Week 3
# 14.8. - 27.8. Week 4
# 28.8. - 10.9. Week 5
# 11.9. - 24.9. Week 6
# 25.9. - 30.9. after election

df_two_weeks_windows <- data.frame("start_of_period"=as.Date(c("2017-07-03", "2017-07-17", "2017-07-31", "2017-08-14", "2017-08-28", "2017-09-11", "2017-09-25")),
                                   "end_of_period"=as.Date(c("2017-07-16", "2017-07-30", "2017-08-13", "2017-08-27", "2017-09-10", "2017-09-24", "2017-09-30")),
                                   "period_number"=c(1:7))
```


```{r Figure A.4 Number of classified tweets over time.}
if (export_figures) {pdf("Output/figures appendix/figure_A4_number_classified_tweets_over_time.pdf", height = 4.5, width=7)}
  par(mar=c(4,4,3,4))
  plot(as.Date(names(tweets_per_day)), as.numeric(tweets_per_day), 
       xlab="Time", ylab="Number of tweets", 
       main="Number of tweets per day", 
       pch=19,
       las=1, bty="n")
  
  points(election_day, tweets_per_day[names(tweets_per_day)==election_day], col="red", pch=19)
  text(election_day, tweets_per_day[names(tweets_per_day)==election_day], labels = "Election day", pos=4, xpd=T)
  
  abline(v=tv_debate, lty="dashed", cex=0.5, col="red")
  points(tv_debate, tweets_per_day[names(tweets_per_day)==tv_debate], col="red", pch=19)
  text(tv_debate, tweets_per_day[names(tweets_per_day)==tv_debate], labels = "TV-Debate", pos=4)
  
  abline(v=df_two_weeks_windows$start_of_period-.5, lty="dotted")
  text(df_two_weeks_windows$start_of_period[1:6]+7,4000, labels=1:6, col=rgb(0,0,0,alpha=0.5), cex=3)
if (export_figures) {dev.off()}
```


```{r}
# Add period number of tweet creation within the two periods to the data frame
candidates_tweets$two_week_period <- NA
for(i in 1:nrow(df_two_weeks_windows)) {
  candidates_tweets$two_week_period[between(candidates_tweets$created_at_day, df_two_weeks_windows$start_of_period[i], df_two_weeks_windows$end_of_period[i])] <- df_two_weeks_windows$period_number[i]
}

table(candidates_tweets$two_week_period)

list_two_weeks_each_party_each_topic <- vector("list", length = nrow(df_two_weeks_windows))

for (i in 1:nrow(df_two_weeks_windows)) {
  list_two_weeks_each_party_each_topic[[i]] <- sum_multiple_cols_by_group(
    candidates_tweets[candidates_tweets$two_week_period==i,c(which(colnames(candidates_tweets)=="foreign_affairs"):which(colnames(candidates_tweets)=="agriculture" ))],
    candidates_tweets[candidates_tweets$two_week_period==i,which(colnames(candidates_tweets)=="screen_name_tolower")])
}
```

```{r Model over time, include=T}
mod8_bino <- glm(top_issue~sex+age+partyname+two_week_period, family="binomial", data=candidates_tweets[candidates_tweets$tweet_classified_wo_pc==1&candidates_tweets$two_week_period<7,])
mod8_bino_int <- glm(top_issue~sex+age+partyname+isListed+two_week_period + two_week_period:partyname, family="binomial", data=candidates_tweets[candidates_tweets$tweet_classified_wo_pc==1&candidates_tweets$two_week_period<7,])

mod8_bino_int <- glm(top_issue~sex+age+partyname+two_week_period + two_week_period:partyname, family="binomial", data=candidates_tweets[candidates_tweets$tweet_classified_wo_pc==1&candidates_tweets$two_week_period<7,])
```


```{r Figure 3 Temporal development of tweets on core issues over time, include=T}
if (export_figures) {png("Output/figures main section/figure_3_temporal_development.png", height=5, width=8, units="in", res=300)}
  theme_set(theme_sjplot())
  plot_model(mod8_bino_int, type = "pred", terms = c("two_week_period", "partyname"), 
             colors = c("hotpink1", "green3", "blue3", "red", "black", "yellow2"),
             show.legend=F, title="Temporal development of a tweet addressing a core issues",
             axis.title = c("two-week intervals", "probability of a tweet concerning a core issue"))
if (export_figures) {dev.off()}

if (export_figures) {pdf("Output/figures main section/figure_3_temporal_development_legend.pdf", height=5, width=2.5)}
par(mar=c(0,0,0,0))
  plot(0,0,pch="", axes=F, ylab="", xlab="")
    legend("center", legend = c("The Greens", "The Left", "SPD", "FDP", "AfD", "CDU/CSU"), col=c("green3","hotpink1", "red", "yellow2", "blue3", "black"), lwd=3, bty="n", cex=1.6)
if (export_figures) {dev.off()}
```


```{r Table A.8 Results from logistic regression analysis for a tweet addressing a ore issue over time, results='asis'}
stargazer(mod8_bino,mod8_bino_int, type= stargazer_opt, out="Output/tables appendix/tab_a8_core_issue_over_time.html")
```


# Share of topics in tweets vs share of topics in manifesto

```{r}
# cat_share_per_party in Manifestos based on our dictionary
cat_per_party_mani <- sum_multiple_cols_by_group(df_col_subset(mani_df, "foreign_affairs", "agriculture"), mani_df$partynameshort)
cat_per_party_mani <- cat_per_party_mani[,-which(colnames(cat_per_party_mani)=="party_competition")]
cat_share_per_party_mani <- cat_per_party_mani/rowSums(cat_per_party_mani)
cat_share_per_party_mani <- party_share_of_topics_mani[order(rownames(party_share_of_topics_mani)),order(colnames(party_share_of_topics_mani))]
```

```{r}
parties_share_topics <- melt(t(cat_share_per_party_mani))
temp <- melt(t(cat_share_per_party[,order(colnames(cat_share_per_party))]))
parties_share_topics <- cbind(parties_share_topics, temp[,3])
colnames(parties_share_topics) <- c("policy", "party", "share_in_manifesto", "share_in_tweets")
parties_share_topics$party <- factor(parties_share_topics$party, levels=c(    "cducsu", "spd", "afd", "fdp", "left", "greens"))
```

```{r Figure A.3 Scatterplot for the issue share in tweets based on the share of respective issue in the manifesto, fig.height=5, fig.width=7}
if (export_figures) {pdf("Output/figures appendix/figure_A3_scatterplot_issue_share_in_tweets.pdf", height=6, width=6)}
layout(mat=matrix(c(1,1,1,1,2),ncol=5))
  par(mar=c(4,4,2,0))
  plot(parties_share_topics$share_in_manifesto, parties_share_topics$share_in_tweets, col=col_parties[as.factor(parties_share_topics$party)], 
       pch=19, xlim=c(0,0.25), ylim=c(0,0.25), las=1, bty="n", xlab="Share in Manifesto" , ylab="Share in tweets")
  abline(coef = c(0,1), lty="dashed")
  
  # Add legend
  par(mar=c(0,0,0,0))
  plot(0,0,pch="", axes=F, ylab="", xlab="")
  legend("center", legend=parties_label, col=col_parties, xpd=T, bty="n", pch=19)
if (export_figures) {dev.off()}
```


# Electoral Security

```{r}
candidates$electoral_safety <- pmax(candidates$safe_list_luke, candidates$safe_dist_luke)
candidates$electoral_safety_2nd <- pmin(candidates$safe_list_luke, candidates$safe_dist_luke)

candidates$which_elec_safety <- ifelse(candidates$safe_list_luke>candidates$safe_dist_luke, "list", "dist")
table(candidates$which_elec_safety)

candidates$distcand[is.na(candidates$distcand)] <- 0
candidates$only_dist <- ifelse(candidates$distcand==1 & candidates$dualcand==0, 1, 0)
candidates$only_dist_no_dual <- ifelse(candidates$distcand==1 & candidates$dualcand==0, 1, 
                                       ifelse(candidates$listcand==1 & candidates$dualcand==0, 0, NA))
```

```{r Figure A.5 Distribution of candidates electoral security}
if (export_figures) {pdf("Output/figures appendix/figure_A5_distribution_candidates_electoral_security.pdf", height=5, width=7)}
  hist(candidates$electoral_safety, main="Electoral security of candidates", las=1, xlab="Electoral security", border=F)
if (export_figures) {dev.off()}
  
mean(candidates$electoral_safety, na.rm=T)
```

```{r}
number_tweets_top_issue <- table(candidates_tweets$screen_name_tolower, candidates_tweets$top_issue)[,2]
number_tweets_top_issue <- data.frame(number_tweets_top_issue, names(number_tweets_top_issue))
number_tweets_top_issue <- number_tweets_top_issue[order(names(number_tweets_top_issue))]

candidates <- merge(candidates, number_tweets_top_issue, by.x = "twitter_name_tolower", by.y="names.number_tweets_top_issue.")

candidates$tweets_share_main_issue <- candidates$number_tweets_top_issue/candidates$politician_number_of_tweets
candidates$classified_tweets_share_main_issue <- candidates$number_tweets_top_issue/candidates$politician_number_of_classified_tweets
candidates$partyname_factor <- factor(candidates$partyname, levels=c("cducsu",  "spd",   "afd",  "fdp", "left",  "greens"), labels = c("CDU/CSU",  "SPD",   "AfD",  "FDP", "The Left",  "The Greens"))
```


```{r}
candidates$classified_tweets_share_main_issue_wo_party_comp <-
 candidates$number_tweets_top_issue/candidates$politician_number_of_classified_tweets_wo_pc

frm_vars <- c("classified_tweets_share_main_issue_wo_party_comp", "tweets_share_main_issue", "electoral_safety","which_elec_safety","politician_number_of_classified_tweets","sex","age","partyname", "only_dist", "electoral_safety_2nd")
frm_data_electoral_safety <- candidates[,frm_vars]

frm_data_electoral_safety_all_parties <- frm_data_electoral_safety[!is.na(frm_data_electoral_safety$classified_tweets_share_main_issue_wo_party_comp),]
write.csv(frm_data_electoral_safety, file="Data_prepared_for_frm_in_STATA/frm_data_electoral_safety_v4.csv")
```


```{r Figure 4b Marginal effects of electoral security on the share of tweets that address a core issue}
# add margins from stata
electoral_safety_margins <- haven::read_dta("Data_frm_results_from_STATA/electoral_safety_mod4_data_margins_with_afd.dta")
colnames(electoral_safety_margins) <- substring(colnames(electoral_safety_margins), first =2)


if (export_figures) {png("Output/figures main section/figure_4b_marginal_effects_electoral_security.png", height=6, width=7, units="in", res=300)}
  plot(0,0, pch="", ylim=c(0.28,0.42), xlim =c(0,1), xlab="electoral security", ylab="share of tweets on core issues", las=1, bty = "n", main="Prediction dependent on electoral security", axes=T)

  polygon(x = c(electoral_safety_margins$at1, rev(electoral_safety_margins$at1)), 
          y = c(electoral_safety_margins$ci_ub, 
                rev(electoral_safety_margins$ci_lb)), 
          col = rgb(100,100,100,100, maxColorValue = 255), lwd=2, border = NA)
  lines(electoral_safety_margins$at1, electoral_safety_margins$margin, col = "black", lwd=2)
if (export_figures) {dev.off()}
```

```{r}
electoral_safety_margins # margin at 0.4
```


# Expertise

```{r}
candidates_long <- tidyr::pivot_longer(data = candidates[candidates$any_ausschuss==TRUE,],
                    cols = c("ausschuss_labour_social_affairs",
                            "ausschuss_foreign_affairs",
                            "ausschuss_interior",
                            "ausschuss_education_culture",
                            "ausschuss_technology_infrastucture",
                            "ausschuss_agriculture",
                            "ausschuss_european_affairs",
                            "ausschuss_equality",
                            "ausschuss_finance",
                            "ausschuss_justice",
                            "ausschuss_environment",
                            "ausschuss_defence",
                            "ausschuss_economics",
                            "ausschuss_migration",
                            "ausschuss_federalism"
                          ),
                    names_to = "ausschuss_name",
                    values_to = "ausschuss_dummy")

candidates_long$ausschuss_name <- str_sub(candidates_long$ausschuss_name, 11)
candidates_long$ausschuss_dummy <- factor(candidates_long$ausschuss_dummy)
```

```{r}
# Add dummy variable to candidates results data frame if a tweet is concerning a top 3 topic of a party or not
candidates_long$top_issue <- 0
for (i in 1:6) {
  candidates_long$top_issue <- ifelse(names(top_topics_each_party)[i]==candidates_long$partyname & candidates_long$ausschuss_name %in% top_topics_each_party[[i]], 1, candidates_long$top_issue)
}
```


```{r}
candidates_long$number_tweets_in_ausschuss <- numeric(length=nrow(candidates_long))
for(i in 1:nrow(candidates_long)) {
  candidates_long$number_tweets_in_ausschuss[i] <- unlist(candidates_long[i,which(colnames(candidates_long)==candidates_long$ausschuss_name[i])])
}
```


```{r}
candidates_long$ausschuss_dummy <- factor(candidates_long$ausschuss_dummy)

# Calculate candidates share of tweets for each policy
candidates_long$share_of_tweets_for_each_ausschuss <- candidates_long$number_tweets_in_ausschuss/candidates_long$politician_number_of_classified_tweets

# merge candidates_long and parties_share_topics by party and ausschuss
parties_share_topics$party_policy <- paste(parties_share_topics$party, parties_share_topics$policy, sep="_")
candidates_long$party_policy <- paste(candidates_long$partyname, candidates_long$ausschuss_name, sep="_")

temp_for_merge <- parties_share_topics[,c("party_policy", "share_in_manifesto")]
candidates_long <- merge(candidates_long, temp_for_merge, by.x = "party_policy", by.y="party_policy")
rm(temp_for_merge)
```


```{r}
frm_vars <- c("share_of_tweets_for_each_ausschuss", "share_in_manifesto", "ausschuss_dummy", "politician_number_of_classified_tweets","sex_factor","age","partyname_factor")
frm_data_expertise_aussschuss <- candidates_long[,frm_vars]
frm_data_expertise_aussschuss <- frm_data_expertise_aussschuss[!is.na(frm_data_expertise_aussschuss$share_of_tweets_for_each_ausschuss),]
write.csv(frm_data_expertise_aussschuss, file="Data_prepared_for_frm_in_STATA/frm_data_expertise_aussschuss.csv")
```


```{r}
expertise_data_margins_dummy_0 <- haven::read_dta("Data_frm_results_from_STATA/expertise_ausschuss_data_margins_ausschuss_dummy0.dta")
colnames(expertise_data_margins_dummy_0) <- substring(colnames(expertise_data_margins_dummy_0), first =2)

expertise_data_margins_dummy_1 <- haven::read_dta("Data_frm_results_from_STATA/expertise_ausschuss_data_margins_ausschuss_dummy1.dta")
colnames(expertise_data_margins_dummy_1) <- substring(colnames(expertise_data_margins_dummy_1), first =2)
```

```{r Figure 4a Marginal effects of issue concentration}
if (export_figures) {png("Output/figures main section/figure_4a_marginal_effects_issue_concentration.png", height=5.5, width=7, units="in", res=300)}
  plot(0, 0, pch="", ylim=c(0,0.25), xlim=c(0,0.28) ,
       xlab="share in manifesto", ylab="share in tweets", las=1, main = "Prediction dependent on expertise", bty="n")
  polygon(x = c(expertise_data_margins_dummy_0$at1, rev(expertise_data_margins_dummy_0$at1)), 
          y = c(expertise_data_margins_dummy_0$ci_ub, 
                rev(expertise_data_margins_dummy_0$ci_lb)), 
          col = rgb(141,160,203,100, maxColorValue = 255), lwd=2, border = NA)
  lines(expertise_data_margins_dummy_0$at1, expertise_data_margins_dummy_0$margin, col = rgb(141,160,203,255, maxColorValue = 255), lwd=2)
  
  polygon(x = c(expertise_data_margins_dummy_1$at1, rev(expertise_data_margins_dummy_1$at1)), 
          y = c(expertise_data_margins_dummy_1$ci_ub, 
                rev(expertise_data_margins_dummy_1$ci_lb)), 
          col = rgb(252,141,98,100, maxColorValue = 255), lwd=2, border = NA)
  lines(expertise_data_margins_dummy_1$at1, expertise_data_margins_dummy_1$margin, col =rgb(252,141,98,255, maxColorValue = 255), lwd=2)  
  legend("right", legend=c("Expertise", "No expertise"), 
         col = c(rgb(252,141,98,255, maxColorValue = 255), rgb(141,160,203,255, maxColorValue = 255)), 
         xpd=T, inset = c(-.07,0), bty="n", lwd = 3)
if (export_figures) {dev.off()}
```


# Data for Table A.7 and Figure A.8

```{r}
parties_share_topics$top_issue_dummy <- 0
for (i in 1:6) {
  parties_share_topics$top_issue_dummy <- ifelse(names(top_topics_each_party)[i]==parties_share_topics$party & parties_share_topics$policy%in%top_topics_each_party[[i]], 1, parties_share_topics$top_issue_dummy)
}
```


```{r}
candidates_long_all <- tidyr::pivot_longer(data = candidates,
                    cols = c("foreign_affairs",
                             "defence",
                             "european_affairs",
                             "justice",
                             "federalism",
                             "interior",
                             "economics",
                             "finance",
                             "labour_social_affairs",
                             "technology_infrastucture",
                             "environment",
                             "education_culture",
                             "equality",
                             "home_affairs",
                             "migration",
                             "multiculturalism",
                             "agriculture"),
                    names_to = "policy_field_name",
                    values_to = "policy_field_number_of_tweets")

candidates_long_all$top_issue <- 0

for (i in 1:6) {
  candidates_long_all$top_issue <- ifelse(names(top_topics_each_party)[i]==candidates_long_all$partyname & candidates_long_all$policy_field_name %in% top_topics_each_party[[i]], 1, candidates_long_all$top_issue)
}

# Calculate candidates share of tweets for each policy
candidates_long_all$share_of_tweets_for_each_policy_field <- candidates_long_all$policy_field_number_of_tweets/candidates_long_all$politician_number_of_classified_tweets

parties_share_topics$party_policy <- paste(parties_share_topics$party, parties_share_topics$policy, sep="_")
candidates_long_all$party_policy <- paste(candidates_long_all$partyname, candidates_long_all$policy_field_name, sep="_")

temp_for_merge <- parties_share_topics[,c("party_policy", "share_in_manifesto")]
candidates_long_all <- merge(candidates_long_all, temp_for_merge, by.x = "party_policy", by.y="party_policy")
rm(temp_for_merge)
```

```{r data-post-analysis-for-stata}
write.csv(parties_share_topics, file="Data_prepared_for_frm_in_STATA/post_analysis_data_parties.csv")
write.csv(candidates_long_all, file="Data_prepared_for_frm_in_STATA/post_analysis_data_candidates_long.csv")
```

# Figure A.8

```{r}
# add margins from stata
individual_data_margins <- haven::read_dta("Data_frm_results_from_STATA/individual_data_margins.dta")
colnames(individual_data_margins) <- substring(colnames(individual_data_margins), first =2)
```


```{r}
frm_vars <- c("share_in_manifesto")
x_frm <- as.matrix(candidates_long_all[!is.na(candidates_long_all$share_of_tweets_for_each_policy_field),frm_vars])
y_frm <- candidates_long_all$share_of_tweets_for_each_policy_field[!is.na(candidates_long_all$share_of_tweets_for_each_policy_field)]
colnames(x_frm) <- frm_vars
```


```{r Figure A.6 Comparison of the predicted proababilities}
if (export_figures) {png("Output/figures appendix/figure_A6_comparison_predicted_proababilities.png", height=5, width=7, units="in", res=300)}
  par(mar=c(4,4,2,6))
  plot(x_frm, y_frm, pch="", ylim=c(0,0.15), xlim=c(0,.25), xlab="share in manifesto", ylab="share in tweets", las=1, bty="n", main="")
  abline(h=c(0,0.05,0.1,0.15), col="grey80")
  abline(v=c(0,0.05,0.1,0.15,.2,.25), col="grey80")
  abline(lm(y_frm ~ x_frm), col="coral", lwd=2)
  polygon(x = c(individual_data_margins$at1, rev(individual_data_margins$at1)), 
          y = c(individual_data_margins$ci_ub, 
                rev(individual_data_margins$ci_lb)), 
          col = rgb(141,160,203,100, maxColorValue = 255), lwd=2, border = NA)
  lines(individual_data_margins$at1, individual_data_margins$margin, col = "#8da0cb", lwd=2)
  legend("right", legend=c("FLM", "OLS"), col=c("#8da0cb", "coral") , xpd=T, inset = c(-.2,0), lty=1, lwd=2)
if (export_figures) {dev.off()}
```

# Figure 2

```{r Figure 2 left panel Marginal effects of the emphasis in the manifestos on individual online communication}
if (export_figures) {png("Output/figures main section/figure_2_left_panel.png", height=5.5, width=7, units="in", res=300)}
  plot(x_frm, y_frm, pch="", ylim=c(0,0.25), xlim=c(0,0.25) ,
       xlab="share in manifesto", ylab="share in tweets", las=1, main = "Prediction for all issues", bty="n")
  polygon(x = c(individual_data_margins$at1, rev(individual_data_margins$at1)), 
          y = c(individual_data_margins$ci_ub, 
                rev(individual_data_margins$ci_lb)), 
          col = rgb(100,100,100,100, maxColorValue = 255), lwd=2, border = NA)
  lines(individual_data_margins$at1, individual_data_margins$margin, col = "black", lwd=2)
if (export_figures) {dev.off()}
```

```{r}
individual_data_margins$margin[individual_data_margins$at1==0.02]
individual_data_margins$margin[individual_data_margins$at1==0.07]

individual_data_margins$margin[individual_data_margins$at1==0.07]-individual_data_margins$margin[individual_data_margins$at1==0.02]
```



```{r}
# Interaction
# add margins from stata
individual_data_margins_top_issue_0 <- haven::read_dta("Data_frm_results_from_STATA/individual_data_margins_subgroup_top_issue_0.dta")
colnames(individual_data_margins_top_issue_0) <- substring(colnames(individual_data_margins_top_issue_0), first =2)

individual_data_margins_top_issue_1 <- haven::read_dta("Data_frm_results_from_STATA/individual_data_margins_subgroup_top_issue_1.dta")
colnames(individual_data_margins_top_issue_1) <- substring(colnames(individual_data_margins_top_issue_1), first =2)


# Only areas supported by data
  
individual_data_margins_top_issue_0 <- individual_data_margins_top_issue_0[individual_data_margins_top_issue_0$at1>=range(parties_share_topics$share_in_manifesto[parties_share_topics$top_issue_dummy==0])[1] & individual_data_margins_top_issue_0$at1<=range(parties_share_topics$share_in_manifesto[parties_share_topics$top_issue_dummy==0])[2],] 
 
individual_data_margins_top_issue_1 <- individual_data_margins_top_issue_1[individual_data_margins_top_issue_1$at1>=range(parties_share_topics$share_in_manifesto[parties_share_topics$top_issue_dummy==1])[1] & individual_data_margins_top_issue_1$at1<=range(parties_share_topics$share_in_manifesto[parties_share_topics$top_issue_dummy==1])[2],]
```


```{r Figure 2 right panel Marginal effects of the emphasis in the manifestos on individual online communication}
if (export_figures) {png("Output/figures main section/figure_2_right_panel.png", height=5.5, width=7, units="in", res=300)}
  plot(x_frm, y_frm, pch="", ylim=c(0,0.25), xlim=c(0,0.25) ,
       xlab="share in manifesto", ylab="share in tweets", las=1, main = "Prediction for core and other issues", bty="n")
#  abline(h=c(0,0.05,0.1,0.15,.2,.25), col="grey80")
#  abline(v=c(0,0.05,0.1,0.15,.2,.25), col="grey80")
  polygon(x = c(individual_data_margins_top_issue_0$at1, rev(individual_data_margins_top_issue_0$at1)), 
          y = c(individual_data_margins_top_issue_0$ci_ub, 
                rev(individual_data_margins_top_issue_0$ci_lb)), 
          col = rgb(141,160,203,100, maxColorValue = 255), lwd=2, border = NA)
  lines(individual_data_margins_top_issue_0$at1, individual_data_margins_top_issue_0$margin, col = rgb(141,160,203,255, maxColorValue = 255), lwd=2)
  
  polygon(x = c(individual_data_margins_top_issue_1$at1, rev(individual_data_margins_top_issue_1$at1)), 
          y = c(individual_data_margins_top_issue_1$ci_ub, 
                rev(individual_data_margins_top_issue_1$ci_lb)), 
          col = rgb(252,141,98,100, maxColorValue = 255), lwd=2, border = NA)
  lines(individual_data_margins_top_issue_1$at1, individual_data_margins_top_issue_1$margin, col =rgb(252,141,98,255, maxColorValue = 255), lwd=2) 
if (export_figures) {dev.off()}
  

if (export_figures) {pdf("Output/figures main section/figure_2_right_panel_legend.pdf", height=5, width=2.5)}
  par(mar=c(0,0,0,0))
  plot(0,0,pch="", axes=F, ylab="", xlab="")
  legend("center", legend=c("Core issues", "Other issues"), 
         col = c(rgb(252,141,98,255, maxColorValue = 255), rgb(141,160,203,255, maxColorValue = 255)),
         bty="n", lwd = 3, cex=1.6)
if (export_figures) {dev.off()}   
```


